home *** CD-ROM | disk | FTP | other *** search
Text File | 1996-01-30 | 61.3 KB | 1,930 lines |
- -- -----------------------------------------------------------------------
- -- Title: Float_elementary_functions
- -- Last Mod: Thu Apr 19 11:26:27 1990
- -- Author: Vincent Broman <broman@nosc.mil>
- -- Copyright 1990 Vincent Broman
- -- Permission granted to copy, modify, or compile this software for
- -- one's own use, provided that this copyright notice is preserved intact.
- -- Permission granted to distribute compiled binary copies of this
- -- software which are linked in with some other application.
- -- Permission granted to distribute other copies of this software,
- -- provided that (1) any copy which is not source code, i.e. not in the
- -- form in which the software is usually maintained, must be accompanied
- -- by a copy of the source code from which it was compiled, and (2) the
- -- one distributing it must refrain from imposing on the recipient
- -- further restrictions on the distribution of this software.
- --
- -- Description:
- --
- -- a Float-precision-only implementation of
- -- the proposed standard generic package of elementary functions,
- -- from the ISO-IEC/JTC1/SC22/WG9 (Ada)
- -- Numerics Rapporteur Group proposal, Draft 1.1.
- --
- -- This will evolve to conform to later drafts.
- --
- -- Exceptions: argument_error, numeric_error are raised.
- -- -----------------------------------------------------------------------
- --
- with math_constants; use math_constants;
- with primitive_functions;
- use primitive_functions;
-
- package body Math is
-
-
- overflow_error: exception renames numeric_error;
- -- AI-00387 specifies constraint_error for floating overflows.
-
- fminexp: constant Float := Float( Float'machine_emin - 1);
- fmaxexp: constant Float := Float( Float'machine_emax);
-
- mant: constant := Float'machine_mantissa;
- recip_sqrt_epsilon: constant Float := 2.0 ** ((mant - 1) / 2);
-
- two_pi: constant := 2.0 * pi;
- half_pi: constant := pi / 2.0;
-
- subtype octant_number is integer range 0 .. 7;
-
-
- function sqr (x: Float) return Float is
- --
- begin
- return x * x;
- end sqr;
- pragma inline (sqr);
-
-
- ----------------------------------------------------------------------
- -- high precision argument reduction routines
-
-
- procedure octant_reduce( x: in Float;
- r: out Float;
- nmod8: out octant_number) is
- --
- -- Octant_reduce reduces x to the interval [0, pi/4] with the high
- -- absolute accuracy needed by trigonometric functions computed in radians
- -- which are subject to stringent relative accuracy requirements.
- --
- -- R is in 0.0 .. pi/4 and, to within the accuracy described below,
- -- either (abs( x) - r) is equal to pi/4 times an even integer
- -- or else (abs( x) - (pi/4 - r)) is equal to pi/4 times an odd integer.
- -- The angles in odd octants are complemented relative to pi/4 so that
- -- small sines and cosines may be computed from small angles,
- -- i.e. r is small anywhere near the x and y coordinate axes.
- -- Further, r=0 iff x=0, because of the way huge arguments get reduced.
- --
- -- NMOD8 is the octant number of x, in 0 .. 7, i.e.
- -- for x >= 0 nmod8 is truncate( x / (pi/4)) mod 8,
- -- for x < 0 nmod8 is 7 - truncate( - x / (pi/4)) mod 8.
- --
- -- The relative accuracy of the reduced angle corresponding to (r,nmod8)
- -- is better than 2 ulp if abs( x) < 6433 < pi/4*2**13.
- --
- -- The inexactness of (r,nmod8) is caused entirely by
- -- (1) inexact reduction of huge arguments into the interval [-6433,6433].
- -- (2) the difference of 1.01e-21 between pi/4 and sum( pterm),
- -- (3) the two roundings in producing r itself.
- -- If abs(x) < 6433, the absolute error in r caused by (1) and (2)
- -- is less than (sum(pterm)-pi/4)*abs(x)/(pi/4) < 8.3e-18 .
- --
- -- On (0, 22000) the maximum value of abs( x / (sin( x)*cos( x)))
- -- for x a Float precision floating point number is less than 6.1e10,
- -- so the maximum relative error in r before rounding in (3) is less than
- -- 6.1e10*1.01e-21/(pi/4) < 7.85e-11, which is .00066 ulp.
- --
- -- For arguments abs( x) > 6433, x is reduced and rounded more than
- -- once, so an extra absolute error is introduced, bounded by:
- -- max( 2**(exponent(2pi)-1-24), abs(x)*2**(-13+1-24)),
- -- where the former bound applies if reducing by whole periods produces a
- -- remainder small than 2pi, the latter bound otherwise.
- --
- -- Test your own system with 161*pi/2 to see how well argument reduction
- -- is done. You should get cos(252.8982086181640625) =~ -4.1857068922e-9 .
- --
- nbits: constant := 13; -- significant bits in n
- oneminus: constant := 1.0 - 3.0 * Float'epsilon; -- 3 minimal?
- toobig: constant := pi / 4.0 * (2.0 ** nbits) * oneminus; -- 6433.3+
- p: constant Float := pi / 4.0;
- ax: Float := abs( x);
- n: Float;
- odd_n: boolean;
-
- function truncate_nbits (x: Float) return Float is
- --
- -- truncate to an integer with nbits or fewer significant bits
- --
- begin -- truncate_nbits
- if exponent( x) < nbits then
- return truncate( x);
- else
- return shorten( x, nbits);
- end if;
- end truncate_nbits;
-
- function reduce_by_n (x: Float;
- n: Float) return Float is
- --
- -- compute x - n * sum(pterm(k),k=1..7)
- -- erring by less than two roundoffs in the final result.
- -- x and n are assumed nonnegative. n has an nbit-length integer value.
- --
- type softreal is array( 1 .. 7) of Float;
- --
- -- The high precision eighth-period:
- --
- -- sum(pterm(k),k=1..7) = pi/4 rounded to 68 bits, in 7 8-bit pieces.
- pterm: constant softreal :=
- (16#0.c9000000000000000#,
- 16#0.000fd000000000000#,
- 16#0.00000aa0000000000#,
- 16#0.00000002200000000#,
- 16#0.00000000016800000#,
- 16#0.000000000000c2000#,
- 16#0.0000000000000034c#);
- --
- -- each product of a 13 bit integer and an 8 bit pterm(k) computed below
- -- lands exactly on a 21 bit model number, no roundoff.
- --
- diff: Float := x;
- sumtail: Float := 0.0;
- prod, nextdiff: Float;
-
- begin -- reduce_by_n
- for k in softreal'range loop
- -- combine largest terms while the subtraction is still exact,
- -- then sum tail to combine with head all at once.
- prod := n * pterm( k);
- nextdiff := diff - prod;
- if abs( nextdiff) > prod then
- -- nextdiff might be inexact
- for m in reverse k + 1 .. softreal'last loop
- sumtail := sumtail + n * pterm( m);
- end loop;
- return diff - (prod + sumtail); -- two roundings
- end if;
- diff := nextdiff; -- difference is still exact
- end loop;
- return diff;
- end reduce_by_n;
-
- function mod8 (x: Float) return octant_number is
- --
- -- return x mod 8 for x having a nonnegative integer value.
- --
- xo8: Float := x * (1.0 / 8.0);
-
- begin -- mod8
- return octant_number( (xo8 - truncate( xo8)) * 8.0);
- end mod8;
-
-
- begin -- octant_reduce
- -- if ax too big to handle accurately then reduce by whole periods first
- while ax > toobig loop
- n := 8.0 * truncate_nbits( ax / (8.0 * p) * oneminus); -- n < ax / p
- ax := reduce_by_n( ax, n);
- end loop;
- --
- -- now ax < pi/4*2**nbits
- --
- n := truncate( ax / p); -- 13 or fewer bits
- odd_n := odd( n);
-
- if odd_n then
- -- odd octant, complement angle wrt pi/4
- -- p - (ax - n*p) = -ax + (n+1)*p
- ax := - reduce_by_n( ax, n + 1.0);
- elsif n > 0.0 then
- -- for even nbr of octants, ax - n*p
- ax := reduce_by_n( ax, n);
- else -- n = 0
- null;
- end if;
-
- if ax < 0.0 then
- -- initial n was off by 1 because of roundoff in ax/p
- r := - ax;
- if odd_n then
- n := n + 1.0;
- else
- n := n - 1.0;
- end if;
- elsif ax > p then
- -- initial n was off by 1 because of roundoff in ax/p
- -- want 2 * p - ax
- r := - reduce_by_n( ax, 2.0);
- if odd_n then
- n := n - 1.0;
- else
- n := n + 1.0;
- end if;
- else
- -- initial n correct, vast majority of cases
- r := ax;
- end if;
-
- if x >= 0.0 then
- nmod8 := mod8( n);
- else
- nmod8 := octant_number'last - mod8( n);
- end if;
-
- end octant_reduce;
-
-
- procedure period_reduce( x: in Float;
- p: in Float;
- r: out Float;
- n: out octant_number) is
- --
- -- Period_reduce reduces x to the interval [0, p/8] with the high
- -- absolute accuracy needed by trigonometric functions
- -- which are subject to stringent relative accuracy requirements.
- --
- -- X is the input angle, in units such that p is the cyclic period.
- -- Execution is erroneous if p is not positive.
- -- R is in 0.0 .. p/8 and, to within the accuracy described below,
- -- either (abs( x) - r) is equal to p/8 times an even integer
- -- or else (abs( x) - (p/8 - r)) is equal to p/8 times an odd integer.
- -- The angles in odd octants are complemented relative to p/8 so that
- -- small sines and cosines may be computed from small angles,
- -- i.e. r is small anywhere near the x and y coordinate axes.
- --
- -- N is the octant number of x, in 0 .. 7, i.e.
- -- for x >= 0 n is truncate( x / (p/8)) mod 8,
- -- for x < 0 n is 7 - truncate( - x / (p/8)) mod 8.
- --
- -- The reduced angle corresponding to (r,n) is rounded and
- -- correct up to the relative machine accuracy of r.
- -- All floating point arithmetic used is exact except possibly for
- -- (1) complementing r in an odd octant, computing (p/8 - r1),
- -- (2) the last reductions, if p/8 loses bits by denormalization.
- --
- r1: Float := abs( x);
- exponent_diff: constant integer := exponent( r1) - exponent( p);
- p1: Float := scale( p, exponent_diff); -- p1 & r1 now have same exponent
- s: integer := exponent_diff + 3;
-
- octant: octant_number := 0;
-
- begin -- period reduce
- if s >= 0 then
- -- We repeatedly subtract (p times a power of 2) from r.
- loop
- if r1 >= p1 then
- -- It is assumed that the floating point subtraction of
- -- two positives (a-b) is exact if either
- -- (1) exponent(a) = exponent(b) or
- -- (2) exponent(a)-1 = exponent(b) >= exponent(a-b).
- -- This is the case for (r1-p1).
- r1 := r1 - p1;
- --
- case s is
- when 0 =>
- r1 := p1 - r1; -- odd octant, complement
- octant := octant + 1;
- when 1 =>
- octant := octant + 2;
- when 2 =>
- octant := octant + 4;
- when others =>
- null;
- end case;
- end if; -- if r1 >= p1
-
- exit when s = 0;
- s := s - 1;
- p1 := p1 * 0.5; -- could scale if that's faster
-
- end loop; -- down s
-
- r := r1;
-
- else
- r := r1; -- small angle, no reduction
-
- end if; -- if s nonnegative
-
- if x >= 0.0 then
- n := octant;
- else
- n := octant_number'last - octant; -- no effect on odd_octant
- end if;
-
- end period_reduce;
-
-
- procedure ln2_reduce( x: in Float;
- r: out Float;
- n: out integer) is
- --
- -- Ln2_reduce reduces x to an interval of width approximately ln(2),
- -- with the high absolute accuracy needed by an exponential function which is
- -- subject to stringent relative accuracy requirements.
- --
- -- This routine requires abs( x) < 89.0 for spec accuracy.
- --
- -- Upon return, x = r + n * ln(2),
- -- to within an absolute accuracy of 1 * epsilon (using the Ada model),
- -- or more realistically to within 6e-8 .
- -- Also, abs( r) <= nat_log_2 / 2 + 256 * epsilon,
- -- and n is in the range -128 .. 128,
- --
- -- The inexactness of the resultant (r,n) is caused entirely by
- -- (1) the negligible difference of about 5.5e-14 between ln(2) and ln2,
- -- (2) the 1 or 2 roundings in producing the result r itself.
- --
- ln2_a: constant := 16#0.b1700000000#;
- ln2_b: constant := 16#0.000217f0000#;
- ln2_c: constant := 16#0.00000007d1c#;
- -- ln(2) = ln2_a+ln2_b+ln2_c accurate to 43 bits, in 3 14-bit pieces.
-
- nrd: Float := floor( x / nat_log_2 + 0.5);
-
- begin -- ln2_reduce
- r := ((x - ln2_a * nrd) - ln2_b * nrd) - ln2_c * nrd;
- -- The multiplications did happen exactly, using model numbers.
- -- The a term subtracts exactly. The c term is so small
- -- that a bound on the absolute error can depend only on the size
- -- of the result of the second subtraction. So, two roundoffs
- -- make the absolute error < 1*epsilon, pessimistically.
- n := integer( nrd);
- -- if we wanted to eliminate slop in the output interval,
- -- then just test r, adjust n, and repeat the reduction.
- end ln2_reduce;
-
-
- function complement (x: in Float) return Float is
- -- compute sqrt(1-x**2)
- ax: constant Float := abs( x);
- begin
- if ax >= 0.5 then
- declare
- t: constant Float := 1.0 - ax;
- begin
- return sqrt( 2.0 * t - t * t);
- end;
- else
- return sqrt( (1.0 + x) * (1.0 - x));
- end if;
- end complement;
-
-
- ----------------------------------------------------------------------
- -- functions on reduced or simplified domains
-
-
- function reduced_logdel_2 (y: in Float) return Float is
- --
- -- return the base-2 logarithm of (1+y)/(1-y),
- -- y in the interval [ -3+sqrt(8) .. 3-sqrt(8) ].
- -- the coefficients are from a rational (even) 1,1 approximation to
- -- log_2( (1+y)/(1-y) )/y with relative error (omitting roundoff) < 1.91e-8.
- -- this delivers high relative accuracy for small y.
- -- Note: reduced_logdel_2( 0.0) = 0.0 exactly.
- --
- p0: constant := -4.7845025;
- p1: constant := 1.2906108;
- q0: constant := -1.6581823;
- y2: constant Float := y * y;
-
- begin
- return y * (p0 + p1 * y2) / (q0 + y2);
- end reduced_logdel_2;
-
-
- function reduced_log_2 (x: in Float) return Float is
- --
- -- return the base-2 logarithm of x, x in the interval [sqrt(1/2),sqrt(2)].
- --
- begin
- return reduced_logdel_2( (x - 1.0) / (x + 1.0));
- end reduced_log_2;
-
- procedure log_2_parts (int_part: out Float;
- frac_part: out Float;
- x: in Float) is
- --
- -- return the integer and fractional part of the
- -- base-2 logarithm of x.
- -- int_part is a floating pt integer, and abs( frac_part) <= 1/2.
- --
- ex: integer := exponent( x);
- ma: Float := mantissa( x);
- sqrt_half: constant := sqrt_2 / 2.0;
-
- begin
- if ma < sqrt_half then
- ma := 2.0 * ma;
- ex := ex - 1;
- end if;
- -- sqrt_half <= ma < sqrt_2
- -- and x = ma * 2 ** ex
- int_part := Float( ex);
- frac_part := reduced_log_2( ma);
- end log_2_parts;
-
- function log_2 (x: in Float) return Float is
- --
- -- return the base-2 logarithm of x, assuming x > 0.0
- --
- -- the value is exact for x an integer power of 2.
- --
- n, f: Float;
-
- begin -- log_2
- log_2_parts( n, f, x);
- return n + f;
- end log_2;
-
-
- function reduced_two_to_the (x: in Float) return Float is
- --
- -- return 2.0**x for x in the interval [-0.524, 0.524].
- -- the coefficients are a Remez-optimized rational(3,2) approximation,
- -- with relative error (omitting roundoff) < 1e-8.
- -- The interval would be [-1/2, 1/2] but we allow some slop.
- --
- p0: constant := 41.694187;
- p1: constant := 17.342325;
- p2: constant := 3.0047081;
- p3: constant := 0.23083161;
- q0: constant := p0; -- assures 2**0 = 1
- q1: constant := -11.5578823;
-
- begin
- return (p0 + x * (p1 + x * (p2 + x * p3)))
- / (q0 + x * (q1 + x));
- end reduced_two_to_the;
-
-
- function two_to_the (x: in Float) return Float is
- --
- -- return 2.0**x, underflowing to zero for very negative x,
- -- and overflowing for very positive x.
- --
- ix: Float := floor( x + 0.5);
- frac: Float := x - ix;
-
- begin
- return scale( reduced_two_to_the( frac), integer( ix));
- end two_to_the;
-
-
- function reduced_sin (x: in Float) return Float is
- --
- -- return sin x for x in the interval [0,pi/4].
- -- the coefficients are a chebychev-economized taylor series for
- -- sin((1-x)*pi/8)/((1-x)*pi/8) on [-1,1],
- -- with absolute error (omitting roundoff) < 2e-8.
- -- this gives high relative accuracy for small x.
- --
- -- scale factor for size of interval [0,pi/4] already reckoned in below
- a0: constant := 9.74495337094254e-01;
- a1: constant := 1.28892138844516e-01;
- a2: constant := -1.59024045807998e-01;
- a3: constant := -1.28509163870500e-02;
- a4: constant := 7.83587003622433e-03;
- a5: constant := 4.55929708079455e-04;
-
- pio8: constant := pi / 8.0;
- y: constant Float := pio8 - x;
-
- begin
- return x * (a0 + y * (a1
- + y * (a2
- + y * (a3
- + y * (a4
- + y * a5)))));
- end reduced_sin;
-
-
- function reduced_cos (x: in Float) return Float is
- --
- -- return cos x for x in the interval [0,pi/4].
- -- the coefficients are a chebychev-economized taylor series for
- -- cos((1-x)*pi/8) on [-1,1],
- -- with absolute error (omitting roundoff) < 1.8e-9
- --
- -- scale factor for size of interval [0,pi/4] already reckoned in below
- a0: constant := 9.23879532410434e-01;
- a1: constant := 3.82683402033700e-01;
- a2: constant := -4.61939745322990e-01;
- a3: constant := -6.37789978530365e-02;
- a4: constant := 3.84943015536361e-02;
- a5: constant := 3.16859376215302e-03;
- a6: constant := -1.27611586733778e-03;
-
- pio8: constant := pi / 8.0;
- y: constant Float := pio8 - x;
-
- begin
- return a0 + y * (a1
- + y * (a2
- + y * (a3
- + y * (a4
- + y * (a5
- + y * a6)))));
- end reduced_cos;
-
-
- function reduced_arctan (x: in Float) return Float is
- --
- -- returns arctan x in radians from [-pi/4,pi/4] for x in [-1,1].
- -- coefficents from Abramowitz and Stegun p81, #4.4.49 .
- -- relative error < 2e-8.
- --
- x2: constant Float := x * x;
- --
- a2: constant := -0.3333314528;
- a4: constant := 0.1999355085;
- a6: constant := -0.1420889944;
- a8: constant := 0.1065626393;
- a10: constant := -0.0752896400;
- a12: constant := 0.0429096138;
- a14: constant := -0.0161657367;
- a16: constant := 0.0028662257;
-
- begin
- return x * (1.0 + x2 * ( a2 +
- x2 * ( a4 +
- x2 * ( a6 +
- x2 * ( a8 +
- x2 * (a10 +
- x2 * (a12 +
- x2 * (a14 +
- x2 * a16))))))));
- end reduced_arctan;
-
-
- ----------------------------------------------------------------------
- -- implementations of user-callable fns
-
-
- function sqrt (x : Float) return Float is
- --
- -- Declaration:
- -- function SQRT (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- SQRT(X) ~ sqrt(X)
- -- Usage:
- -- Z := SQRT(X);
- -- Domain:
- -- X >= 0.0
- -- Range:
- -- SQRT(X) >= 0.0
- -- Accuracy:
- -- (a) Maximum relative error = 2.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) SQRT(0.0) = 0.0
- -- Notes:
- -- (a) The upper bound of the reachable range of SQRT is approximately given by
- -- SQRT(X)<=sqrt(FLOAT_TYPE'SAFE_LARGE)
- -- (b) Other standards might impose additional constraints on SQRT. For
- -- example, the IEEE standards for binary and radix-independent
- -- floating-point arithmetic require greater accuracy in the result of SQRT
- -- than this standard requires, and they require that SQRT(-0.0)=-0.0.
- -- An implementation of SQRT in GENERIC_ELEMENTARY_FUNCTIONS that
- -- conforms to this standard will conform to those other standards if it
- -- satisfies their additional constraints.
- --
- begin
- if x > 0.0 then
- declare
- expo: integer := exponent( x);
- scaling: integer; -- for scaling of result
- reduced_arg, estimate: Float;
- --
- -- minimax approximation constants from Hart p192, SQRT 0290
- -- adjusted for an interval of [1/2,2] instead of [1/4,1].
- p1: constant := 3.09033297;
- p0: constant := 1.0000233;
- q0: constant := 3.0903708;
- begin
- if expo >= 0 then
- scaling := expo / 2;
- else
- scaling := (expo - 1) / 2;
- end if;
- reduced_arg := scale( x, - 2 * scaling);
- -- 0.5 <= reduced_arg < 2.0
- --
- -- rational approx of order 1,1 to sqrt( reduced_arg)
- estimate := (p1 * reduced_arg + p0) / (reduced_arg + q0);
- -- which has 2**-8.6 max relative error.
- -- need only 2**-5.3 to support iteration to follow.
- -- Now apply Heron's iteration twice
- estimate := 0.5 * (estimate + reduced_arg / estimate);
- estimate := 0.5 * (estimate + reduced_arg / estimate);
- return scale( estimate, scaling);
- end;
- elsif x = 0.0 then
- return 0.0;
- else
- raise argument_error;
- end if;
- end sqrt;
-
-
- function log (x : Float) return Float is
- --
- -- Declaration:
- -- function LOG (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- LOG(X) ~ ln(X)
- -- Usage:
- -- Z := LOG(X); -- natural logarithm
- -- Domain:
- -- X > 0.0
- -- Range:
- -- Mathematically unbounded
- -- Accuracy:
- -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) LOG(1.0) = 0.0
- -- Notes:
- -- The reachable range of LOG is approximately given by
- -- ln(FLOAT_TYPE'SAFE_SMALL) <= LOG(X) <= ln(FLOAT_TYPE'SAFE_LARGE)
- --
- begin
- if x > 0.0 then
- -- log_2( 1.0) = 0.0
- return log_2( x) * nat_log_2;
- else
- raise argument_error;
- end if;
- end log;
-
-
- function log (x, base : Float) return Float is
- --
- -- Declaration:
- -- function LOG (X, BASE : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- LOG(X,BASE) ~ log to base BASE of X
- -- Usage:
- -- Z := LOG(X, 10.0); -- base 10 logarithm
- -- Z := LOG(X, 2.0); -- base 2 logarithm
- -- Z := LOG(X, BASE); -- base BASE logarithm
- -- Domain:
- -- (a) X > 0.0
- -- (b) BASE > 0.0
- -- (c) BASE /= 1.0
- -- Range:
- -- Mathematically unbounded
- -- Accuracy:
- -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) LOG(1.0,BASE) = 0.0
- -- Notes:
- -- (a) When BASE > 1.0, the reachable range of LOG is approximately given by
- -- log to base BASE of FLOAT_TYPE'SAFE_SMALL <= LOG(X,BASE) <=
- -- log to base BASE of FLOAT_TYPE'SAFE_LARGE
- -- (b) When 0.0 < BASE < 1.0, the reachable range of LOG is approximately given by
- -- log to base BASE of FLOAT_TYPE'SAFE_LARGE <= LOG(X,BASE) <=
- -- log to base BASE of FLOAT_TYPE'SAFE_SMALL
- --
- begin
- if x > 0.0 and base > 0.0 and base /= 1.0 then
- -- log_2( 1.0) = 0.0
- return log_2( x) / log_2( base);
- else
- raise argument_error;
- end if;
- end log;
-
-
- function exp (x : Float) return Float is
- --
- -- Declaration:
- -- function EXP (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- EXP(X) ~ e raised to the X power
- -- Usage:
- -- Z := EXP(X); -- e raised to the power X
- -- Domain:
- -- Mathematically unbounded
- -- Range:
- -- EXP(X) >= 0.0
- -- Accuracy:
- -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) EXP(0.0) = 1.0
- -- Notes:
- -- The usable domain of EXP is approximately given by
- -- X <= ln(FLOAT_TYPE'SAFE_LARGE)
- --
- minarg: constant Float := fminexp * nat_log_2;
- maxarg: constant Float := fmaxexp * nat_log_2;
-
- begin
- if x >= maxarg then
- raise overflow_error;
- elsif x < minarg then
- return 0.0;
- else
- declare
- r: Float;
- n: integer;
- begin
- ln2_reduce( x, r, n);
- return scale( reduced_two_to_the( r / nat_log_2), n);
- end;
- end if;
- end exp;
-
-
- function "**" (x, y : Float) return Float is
- --
- -- Declaration:
- -- function "**" (X, Y : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- X ** Y ~ X raised to the power Y
- -- Usage:
- -- Z := X ** Y; -- X raised to the power Y
- -- Domain:
- -- (a) X >= 0.0
- -- (b) Y > 0.0 when X = 0.0
- -- Range:
- -- X ** Y >= 0.0
- -- Accuracy:
- -- (a) Maximum relative error (when X > 0.0) =
- -- (4.0+|Y*ln(X)|/32.0) * FLOAT_TYPE'BASE'EPSILON
- -- (b) X ** 0.0 = 1.0 when X > 0.0
- -- (c) 0.0 ** Y = 0.0 when Y > 0.0
- -- (d) X ** 1.0 = X
- -- (e) 1.0 ** Y =1.0
- -- Notes:
- -- The usable domain of "**", when X > 0.0, is approximately the set of
- -- values for X and Y satisfying
- -- Y*ln(X) <= ln(FLOAT_TYPE'SAFE_LARGE)
- -- This imposes a positive upper bound on Y (as a function of X) when
- -- X > 1.0, and a negative lower bound on Y (as a function of X) when
- -- 0.0 < X < 1.0.
- --
- function half_precision (r: Float) return Float is
- -- truncate to half as many bits in the mantissa
- begin
- return shorten( r, Float'machine_mantissa / 2);
- end half_precision;
- pragma inline( half_precision);
-
- begin -- function "**"
- if x = 1.0 then
- return 1.0; -- 1**y
- elsif x > 0.0 then
- if y = 0.0 then
- return 1.0; -- x**0
- elsif y = 1.0 then
- return x; -- x**1
- else
- declare -- general case of 2**( y*log_2( x))
- sy: Float := y;
- fex, ma, logx: Float;
- begin
- log_2_parts( fex, ma, x);
- logx := fex + ma;
- -- If y*log_2( x) overflows positively, we don't care
- -- at what point the overflow exception happens.
- -- Large negative values of y*log_2( x) are detected by
- -- the following conservative test, which is equivalent to
- -- sy * logx < 2 * fminexp, but will not overflow,
- -- because abs( logx) < 2 * abs( fminexp).
- if (sy / (2.0 * fminexp)) * logx > 1.0 then
- return 0.0; -- underflow
- else
- declare
- -- y*log_2( x) must have 7 extra bits of accuracy
- -- to get full precision in the mantissa of the result.
- -- so we split up the factors in two parts
- y1: constant Float := half_precision( sy);
- y2: constant Float := sy - y1;
- -- sy = y1 + y2 exactly
- w1: constant Float := half_precision( logx);
- w2: constant Float := (fex - w1) + ma;
- -- fex + ma = w1 + w2 exactly
- --
- -- what we need is (y1+y2)*(w1+w2) very accurately
- y1w1: constant Float := y1*w1; -- exactly
- n: constant Float := floor( y1w1 + 0.5);
- frac: constant Float := (y1w1 - n) +
- (sy * w2 + y2 * w1);
- -- rounded to within 2**-Float'mantissa,
- -- frac is nearly confined to [-.5,.5] .
- begin
- return scale( reduced_two_to_the( frac), integer( n));
- end;
- end if;
- end;
- end if;
- elsif x = 0.0 and y > 0.0 then
- return 0.0; -- 0**y
- else
- raise argument_error; -- x < 0 or x = 0 and y <= 0
- end if;
- end "**";
-
-
- function sin (x : Float) return Float is
- --
- -- Declaration:
- -- function SIN (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- SIN(X) ~ sin(X)
- -- Usage:
- -- Z := SIN(X); -- X in radians
- -- Domain:
- -- Mathematically unbounded
- -- Range:
- -- |SIN(X)| <= 1.0
- -- Accuracy:
- -- (a) Maximum relative error = 2.0 * FLOAT_TYPE'BASE'EPSILON
- -- when |X| is less than or equal to some documented implementation-dependent
- -- threshold, which must not be less than
- -- FLOAT_TYPE'MACHINE_RADIX ** (FLOAT_TYPE'MACHINE_MANTISSA/2)
- -- For larger values of |X|, degraded accuracy is allowed. An implementation
- -- must document its behavior for large |X|.
- -- (b) SIN(0.0) = 0.0
- --
- r: Float;
- oct: octant_number;
-
- begin
- octant_reduce( x, r, oct);
- case oct is
- when 0| 3 =>
- return reduced_sin( r);
- when 1| 2 =>
- return reduced_cos( r);
- when 4| 7 =>
- return - reduced_sin( r);
- when 5| 6 =>
- return - reduced_cos( r);
- end case;
- end sin;
-
-
- function sin (x, cycle : Float) return Float is
- --
- -- Declaration:
- -- function SIN (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- SIN(X,CYCLE) ~ sin(2Pi * X/CYCLE)
- -- Usage:
- -- Z := SIN(X, 360.0); -- X in degrees
- -- Z := SIN(X, 1.0); -- X in bams (binary angular measure)
- -- Z := SIN(X, CYCLE); -- X in units such that one complete
- -- -- cycle of rotation corresponds to
- -- -- X = CYCLE
- -- Domain:
- -- (a) X mathematically unbounded
- -- (b) CYCLE > 0.0
- -- Range:
- -- |SIN(X,CYCLE)| <= 1.0
- -- Accuracy:
- -- (a) Maximum relative error = 2.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) For integer k, SIN(X,CYCLE)= 0.0 when X=k*CYCLE/2.0
- -- 1.0 when X=(4k+1)*CYCLE/4.0
- -- -1.0 when X=(4k+3)*CYCLE/4.0
- --
- r: Float;
- oct: octant_number;
-
- begin
- if cycle > 0.0 then
- period_reduce( x, cycle, r, oct);
- r := r / cycle * two_pi; -- underflow denormal allowed
- case oct is
- when 0| 3 =>
- return reduced_sin( r);
- when 1| 2 =>
- return reduced_cos( r);
- when 4| 7 =>
- return - reduced_sin( r);
- when 5| 6 =>
- return - reduced_cos( r);
- end case;
- else
- raise argument_error;
- end if;
- end sin;
-
-
- function cos (x : Float) return Float is
- --
- -- Declaration:
- -- function COS (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- COS(X) ~ cos(X)
- -- Usage:
- -- Z := COS(X); -- X in radians
- -- Domain:
- -- Mathematically unbounded
- -- Range:
- -- |COS(X)| <= 1.0
- -- Accuracy:
- -- (a) Maximum relative error = 2.0 * FLOAT_TYPE'BASE'EPSILON
- -- when |X| is less than or equal to some documented implementation-dependent
- -- threshold, which must not be less than
- -- FLOAT_TYPE'MACHINE_RADIX ** (FLOAT_TYPE'MACHINE_MANTISSA/2)
- -- For larger values of |X|, degraded accuracy is allowed. An implementation
- -- must document its behavior for large |X|.
- -- (b) COS(0.0) = 1.0
- --
- r: Float;
- oct: octant_number;
-
- begin
- octant_reduce( x, r, oct);
- case oct is
- when 0| 7 =>
- return reduced_cos( r);
- when 1| 6 =>
- return reduced_sin( r);
- when 2| 5 =>
- return - reduced_sin( r);
- when 3| 4 =>
- return - reduced_cos( r);
- end case;
- end cos;
-
-
- function cos (x, cycle : Float) return Float is
- --
- -- Declaration:
- -- function COS (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- COS(X,CYCLE) ~ cos(2Pi*X/CYCLE)
- -- Usage:
- -- Z := COS(X, 360.0); -- X in degrees
- -- Z := COS(X, 1.0); -- X in bams
- -- Z := COS(X, CYCLE); -- X in units such that one complete
- -- -- cycle of rotation corresponds to
- -- -- X = CYCLE
- -- Domain:
- -- (a) X mathematically unbounded
- -- (b) CYCLE > 0.0
- -- Range:
- -- |COS(X,CYCLE)| <= 1.0
- -- Accuracy:
- -- (a) Maximum relative error = 2.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) For integer k, COS(X,CYCLE) = 1.0 when X=k*CYCLE
- -- 0.0 when X=(2k+1)*CYCLE/4.0
- -- -1.0 when X=(2k+1)*CYCLE/2.0
- --
- r: Float;
- oct: octant_number;
-
- begin
- if cycle > 0.0 then
- period_reduce( x, cycle, r, oct);
- r := r / cycle * two_pi; -- underflow denormal allowed
- case oct is
- when 0| 7 =>
- return reduced_cos( r);
- when 1| 6 =>
- return reduced_sin( r);
- when 2| 5 =>
- return - reduced_sin( r);
- when 3| 4 =>
- return - reduced_cos( r);
- end case;
- else
- raise argument_error;
- end if;
- end cos;
-
-
- function tan (x : Float) return Float is
- --
- -- Declaration:
- -- function TAN (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- TAN(X) ~ tan(X)
- -- Usage:
- -- Z := TAN(X); -- X in radians
- -- Domain:
- -- Mathematically unbounded
- -- Range:
- -- Mathematically unbounded
- -- Accuracy:
- -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON
- -- when |X| is less than or equal to some documented implementation-dependent
- -- threshold, which must not be less than
- -- FLOAT_TYPE'MACHINE_RADIX ** (FLOAT_TYPE'MACHINE_MANTISSA/2)
- -- For larger values of |X|, degraded accuracy is allowed. An implementation
- -- must document its behavior for large |X|.
- -- (b) TAN(0.0) = 0.0
- --
- r: Float;
- oct: octant_number;
-
- begin
- octant_reduce( x, r, oct);
- case oct is
- when 0| 4 =>
- return reduced_sin( r) / reduced_cos( r);
- when 1| 5 =>
- if r /= 0.0 then
- return reduced_cos( r) / reduced_sin( r);
- else -- x must have been huge
- return 0.0;
- end if;
- when 2| 6 =>
- if r /= 0.0 then
- return - reduced_cos( r) / reduced_sin( r);
- else -- x must have been huge
- return 0.0;
- end if;
- when 3| 7 =>
- return - reduced_sin( r) / reduced_cos( r);
- end case;
- end tan;
-
-
- function tan (x, cycle : Float) return Float is
- --
- -- Declaration:
- -- function TAN (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- TAN(X,CYCLE) ~ tan(2Pi*X/CYCLE)
- -- Usage:
- -- Z := TAN(X, 360.0); -- X in degrees
- -- Z := TAN(X, 1.0); -- X in bams
- -- Z := TAN(X, CYCLE); -- X in units such that one complete
- -- -- cycle of rotation corresponds to
- -- -- X = CYCLE
- -- Domain:
- -- (a) X /= (2k+1)*CYCLE/4.0, for integer k
- -- (b) CYCLE > 0.0
- -- Range:
- -- Mathematically unbounded
- -- Accuracy:
- -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) TAN(X,CYCLE) = 0.0 when X=k*CYCLE/2.0, for integer k
- --
- r: Float;
- oct: octant_number;
-
- begin
- if cycle > 0.0 then
- period_reduce( x, cycle, r, oct);
- r := r / cycle * two_pi; -- underflow denormal allowed
- case oct is
- when 0| 4 =>
- return reduced_sin( r) / reduced_cos( r);
- when 1| 5 =>
- if r /= 0.0 then
- return reduced_cos( r) / reduced_sin( r);
- else
- raise argument_error;
- end if;
- when 2| 6 =>
- if r /= 0.0 then
- return - reduced_cos( r) / reduced_sin( r);
- else
- raise argument_error;
- end if;
- when 3| 7 =>
- return - reduced_sin( r) / reduced_cos( r);
- end case;
- else
- raise argument_error;
- end if;
- end tan;
-
-
- function cot (x : Float) return Float is
- --
- -- Declaration:
- -- function COT (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- COT(X) ~ cot(X)
- -- Usage:
- -- Z := COT(X); -- X in radians
- -- Domain:
- -- X /= 0.0
- -- Range:
- -- Mathematically unbounded
- -- Accuracy:
- -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON
- -- when |X| is less than or equal to some documented implementation-dependent
- -- threshold, which must not be less than
- -- FLOAT_TYPE'MACHINE_RADIX ** (FLOAT_TYPE'MACHINE_MANTISSA/2)
- -- For larger values of |X|, degraded accuracy is allowed. An implementation
- -- must document its behavior for large |X|.
- --
- r: Float;
- oct: octant_number;
-
- begin
- if x /= 0.0 then
- octant_reduce( x, r, oct);
- case oct is
- when 0| 4 =>
- if r /= 0.0 then
- return reduced_cos( r) / reduced_sin( r);
- else -- x must have been huge
- return 0.0;
- end if;
- when 1| 5 =>
- return reduced_sin( r) / reduced_cos( r);
- when 2| 6 =>
- return - reduced_sin( r) / reduced_cos( r);
- when 3| 7 =>
- if r /= 0.0 then
- return - reduced_cos( r) / reduced_sin( r);
- else -- x must have been huge
- return 0.0;
- end if;
- end case;
- else
- raise argument_error;
- end if;
- end cot;
-
-
- function cot (x, cycle : Float) return Float is
- --
- -- Declaration:
- -- function COT (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- COT(X,CYCLE) ~ cot(2Pi*X/CYCLE)
- -- Usage:
- -- Z := COT(X, 360.0); -- X in degrees
- -- Z := COT(X, 1.0); -- X in bams
- -- Z := COT(X, CYCLE); -- X in units such that one complete
- -- -- cycle of rotation corresponds to
- -- -- X = CYCLE
- -- Domain:
- -- (a) X /= k*CYCLE/2.0, for integer k
- -- (b) CYCLE > 0.0
- -- Range:
- -- Mathematically unbounded
- -- Accuracy:
- -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) COT(X,CYCLE) = 0.0 when X=(2k+1)*CYCLE/4.0, for integer k
- --
- r: Float;
- oct: octant_number;
-
- begin
- if cycle > 0.0 then
- period_reduce( x, cycle, r, oct);
- r := r / cycle * two_pi; -- underflow denormal allowed
- case oct is
- when 0| 4 =>
- if r /= 0.0 then
- return reduced_cos( r) / reduced_sin( r);
- else
- raise argument_error;
- end if;
- when 1| 5 =>
- return reduced_sin( r) / reduced_cos( r);
- when 2| 6 =>
- return - reduced_sin( r) / reduced_cos( r);
- when 3| 7 =>
- if r /= 0.0 then
- return - reduced_cos( r) / reduced_sin( r);
- else
- raise argument_error;
- end if;
- end case;
- else
- raise argument_error;
- end if;
- end cot;
-
-
- function arcsin (x : Float) return Float is
- --
- -- Declaration:
- -- function ARCSIN (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- ARCSIN(X) ~ arcsine(X)
- -- Usage:
- -- Z := ARCSIN(X); -- Z in radians
- -- Domain:
- -- |X| <= 1.0
- -- Range:
- -- |ARCSIN(X)| <= Pi/2
- -- Accuracy:
- -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) ARCSIN(0.0) = 0.0
- -- (c) ARCSIN(1.0) = Pi/2
- -- (d) ARCSIN(-1.0) = -Pi/2
- -- Notes:
- -- - Pi/2 and Pi/2 are not safe numbers of FLOAT_TYPE. Accordingly,
- -- an implementation may exceed the range limits, but only slightly;
- -- cf. Section 9 for a precise statement of the requirements. Similarly,
- -- when accuracy requirement (c) or (d) applies, an implementation may
- -- approximate the prescribed result, but only within narrow limits;
- -- cf. Section 10 for a precise statement of the requirements.
- --
- begin
- if abs( x) <= 1.0 then
- return arctan( y => x, x => complement( x));
- else
- raise argument_error;
- end if;
- end arcsin;
-
-
- function arcsin (x, cycle : Float) return Float is
- --
- -- Declaration:
- -- function ARCSIN (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- ARCSIN(X,CYCLE) ~ arcsin(X)*CYCLE/2Pi
- -- Usage:
- -- Z := ARCSIN(X, 360.0); -- Z in degrees
- -- Z := ARCSIN(X, 1.0); -- Z in bams
- -- Z := ARCSIN(X, CYCLE); -- Z in units such that one complete
- -- -- cycle of rotation corresponds to
- -- -- Z = CYCLE
- -- Domain:
- -- (a) |X| <= 1.0
- -- (b) CYCLE > 0.0
- -- Range:
- -- |ARCSIN(X,CYCLE) <= CYCLE/4.0
- -- Accuracy:
- -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) ARCSIN(0.0,CYCLE) = 0.0
- -- (c) ARCSIN(1.0,CYCLE) = CYCLE/4.0
- -- (d) ARCSIN(-1.0,CYCLE) = -CYCLE/4.0
- -- Notes:
- -- - CYCLE/4.0 and CYCLE/4.0
- -- might not be safe numbers of FLOAT_TYPE. Accordingly,
- -- an implementation may exceed the range limits, but only slightly;
- -- cf. Section 9 for a precise statement of the requirements. Similarly,
- -- when accuracy requirement (c) or (d) applies, an implementation may
- -- approximate the prescribed result, but only within narrow limits;
- -- cf. Section 10 for a precise statement of the requirements.
- --
- begin
- if abs( x) <= 1.0 and cycle > 0.0 then
- return arctan( y => x,
- x => complement( x),
- cycle => cycle);
- else
- raise argument_error;
- end if;
- end arcsin;
-
-
- function arccos (x : Float) return Float is
- --
- -- Declaration:
- -- function ARCCOS (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- ARCCOS(X) ~ arccos(X)
- -- Usage:
- -- Z := ARCCOS(X); -- Z in radians
- -- Domain:
- -- |X| <= 1.0
- -- Range:
- -- 0.0 <= ARCCOS(X) <= Pi
- -- Accuracy:
- -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) ARCCOS(1.0) = 0.0
- -- (c) ARCCOS(0.0) = Pi/2
- -- (d) ARCCOS(-1.0) = Pi
- -- Notes:
- -- Pi/2 and Pi are not safe numbers of FLOAT_TYPE. Accordingly,
- -- an implementation may exceed the range limits, but only slightly;
- -- cf. Section 9 for a precise statement of the requirements. Similarly,
- -- when accuracy requirement (c) or (d) applies, an implementation may
- -- approximate the prescribed result, but only within narrow limits;
- -- cf. Section 10 for a precise statement of the requirements.
- --
- begin
- if abs( x) <= 1.0 then
- return arctan( y => complement( x), x => x);
- else
- raise argument_error;
- end if;
- end arccos;
-
-
- function arccos (x, cycle : Float) return Float is
- --
- -- Declaration:
- -- function ARCCOS (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- ARCCOS(X,CYCLE) ~ arccos(X)*CYCLE/2Pi
- -- Usage:
- -- Z := ARCCOS(X, 360.0); -- Z in degrees
- -- Z := ARCCOS(X, 1.0); -- Z in bams
- -- Z := ARCCOS(X, CYCLE); -- Z in units such that one complete
- -- -- cycle of rotation corresponds to
- -- -- Z = CYCLE
- -- Domain:
- -- (a) |X| <= 1.0
- -- (b) CYCLE > 0.0
- -- Range:
- -- 0.0 <= ARCCOS(X,CYCLE) <= CYCLE/2.0
- -- Accuracy:
- -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) ARCCOS(1.0,CYCLE) = 0.0
- -- (c) ARCCOS(0.0,CYCLE) = CYCLE/4.0
- -- (d) ARCCOS(-1.0,CYCLE) = CYCLE/2.0
- -- Notes:
- -- CYCLE/4.0 and CYCLE/2.0
- -- might not be safe numbers of FLOAT_TYPE. Accordingly,
- -- an implementation may exceed the range limits, but only slightly;
- -- cf. Section 9 for a precise statement of the requirements. Similarly,
- -- when accuracy requirement (c) or (d) applies, an implementation may
- -- approximate the prescribed result, but only within narrow limits;
- -- cf. Section 10 for a precise statement of the requirements.
- --
- begin
- if abs( x) <= 1.0 and cycle > 0.0 then
- return arctan( y => complement( x),
- x => x,
- cycle => cycle);
- else
- raise argument_error;
- end if;
- end arccos;
-
-
- function arctan (y : Float;
- x : Float := 1.0) return Float is
- --
- -- Declaration:
- -- function ARCTAN (Y : FLOAT_TYPE;
- -- X : FLOAT_TYPE := 1.0) return FLOAT_TYPE;
- -- Definition:
- -- (a) ARCTAN(Y) ~ arctan(Y)
- -- (b) ARCTAN(Y,X) ~ arctan(Y/X) when X >= 0.0
- -- arctan(Y/X)+Pi when X < 0.0 and Y >= 0.0
- -- arctan(Y/X)-Pi when X < 0.0 and Y < 0.0
- -- Usage:
- -- Z := ARCTAN(Y); -- Z, in radians, is the angle (in the
- -- -- quadrant containing the point (1.0,Y))
- -- -- whose tangent is Y
- -- Z := ARCTAN(Y, X); -- Z, in radians, is the angle (in the
- -- -- quadrant containing the point (X,Y))
- -- -- whose tangent is Y/X
- -- Domain:
- -- X /= 0.0 when Y = 0.0
- -- Range:
- -- (a) |ARCTAN(Y)| <= Pi/2
- -- (b) 0.0 < ARCTAN(Y,X) <= Pi when Y >= 0.0
- -- (c) -Pi <= ARCTAN(Y,X) <= 0.0 when Y < 0.0
- -- Accuracy:
- -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) ARCTAN(0.0) = 0.0
- -- (c) ARCTAN((0.0,X) = 0.0 when X > 0.0
- -- Pi when X < 0.0
- -- (d) ARCTAN(Y,0.0) = Pi/2 when Y > 0.0
- -- -Pi/2 when Y < 0.0
- -- Notes:
- -- -Pi,-Pi/2,Pi/2 and Pi are not safe numbers of FLOAT_TYPE. Accordingly,
- -- an implementation may exceed the range limits, but only slightly;
- -- cf. Section 9 for a precise statement of the requirements. Similarly,
- -- when accuracy requirement (c) or (d) applies, an implementation may
- -- approximate the prescribed result, but only within narrow limits;
- -- cf. Section 10 for a precise statement of the requirements.
- --
- begin
- if x = 0.0 and y = 0.0 then
- raise argument_error;
- elsif x >= y then
- if x >= - y then
- return reduced_arctan( y / x); -- near +x axis
- else
- return - half_pi - reduced_arctan( x / y); -- near -y axis
- end if;
- elsif x >= - y then
- return half_pi - reduced_arctan( x / y); -- near +y axis
- elsif y >= 0.0 then
- return pi + reduced_arctan( y / x); -- just above -x axis
- else
- return - pi + reduced_arctan( y / x); -- just below -x axis
- end if;
- end arctan;
-
-
- function arctan (y : Float;
- x : Float := 1.0;
- cycle : Float) return Float is
- --
- -- Declaration:
- -- function ARCTAN (Y : FLOAT_TYPE;
- -- X : FLOAT_TYPE := 1.0;
- -- CYCLE : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- (a) ARCTAN(Y,CYCLE) ~ arctan(Y)*CYCLE/2Pi
- -- (b) ARCTAN(Y,X,CYCLE) ~ arctan(Y/X)*CYCLE/2Pi when X >= 0.0
- -- (arctan(Y/X)+Pi)*CYCLE/2Pi when X < 0.0 and Y >= 0.0
- -- (arctan(Y/X)-Pi)*CYCLE/2Pi when X < 0.0 and Y < 0.0
- -- Usage:
- -- Z := ARCTAN(Y, CYCLE => 360.0); -- Z, in degrees, is the
- -- -- angle (in the quadrant
- -- -- containing the point
- -- -- (1.0,Y)) whose tangent is Y
- -- Z := ARCTAN(Y, CYCLE => 1.0); -- Z, in bams, is the
- -- -- angle (in the quadrant
- -- -- containing the point
- -- -- (1.0,Y)) whose tangent is Y
- -- Z := ARCTAN(Y, CYCLE => CYCLE); -- Z, in units such that one
- -- -- complete cycle of rotation
- -- -- corresponds to Z = CYCLE,
- -- -- is the angle (in the
- -- -- quadrant containing the
- -- -- point (1.0,Y)) whose
- -- -- tangent is Y
- -- Z := ARCTAN(Y, X, 360.0); -- Z, in degrees, is the
- -- -- angle (in the quadrant
- -- -- containing the point (X,Y))
- -- -- whose tangent is Y/X
- -- Z := ARCTAN(Y, X, 1.0); -- Z, in bams, is the
- -- -- angle (in the quadrant
- -- -- containing the point (X,Y))
- -- -- whose tangent is Y/X
- -- Z := ARCTAN(Y, X, CYCLE); -- Z, in units such that one
- -- -- complete cycle of rotation
- -- -- corresponds to Z = CYCLE,
- -- -- is the angle (in the
- -- -- quadrant containing the
- -- -- point (X,Y)) whose
- -- -- tangent is Y/X
- -- Domain:
- -- (a) X /= 0.0 when Y = 0.0
- -- (b) CYCLE > 0.0
- -- Range:
- -- (a) |ARCTAN(Y,CYCLE)| <= CYCLE/4.0
- -- (b) 0.0 <= ARCTAN(Y,X,CYCLE) <= CYCLE/2.0 when Y >= 0.0
- -- (c) -CYCLE/2.0 <= ARCTAN(Y,X,CYCLE) <= 0.0 when Y < 0.0
- -- Accuracy:
- -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) ARCTAN(0.0,CYCLE) = 0.0
- -- (c) ARCTAN(0.0,X,CYCLE) = 0.0 when X > 0.0
- -- CYCLE/2.0 when X < 0.0
- -- (d) ARCTAN(Y,0.0,CYCLE) = CYCLE/4.0 when Y > 0.0
- -- -CYCLE/4.0 when Y < 0.0
- -- Notes:
- -- -CYCLE/2.0, -CYCLE/4.0, CYCLE/4.0 and CYCLE/2.0
- -- might not be safe numbers of FLOAT_TYPE. Accordingly,
- -- an implementation may exceed the range limits, but only slightly;
- -- cf. Section 9 for a precise statement of the requirements. Similarly,
- -- when accuracy requirement (c) or (d) applies, an implementation may
- -- approximate the prescribed result, but only within narrow limits;
- -- cf. Section 10 for a precise statement of the requirements.
- --
- scale: constant Float := cycle / two_pi;
-
- begin
- -- what happens if scale underflows? cycle/4 underflows?
- if cycle <= 0.0 or else (x = 0.0 and y = 0.0) then
- raise argument_error;
- elsif x >= y then
- if x >= - y then
- -- near +x axis
- return scale * reduced_arctan( y / x);
- else
- -- near -y axis
- return - cycle / 4.0 - scale * reduced_arctan( x / y);
- end if;
- elsif x >= - y then
- -- near +y axis
- return cycle / 4.0 - scale * reduced_arctan( x / y);
- elsif y >= 0.0 then
- -- just above -x axis
- return cycle / 2.0 + scale * reduced_arctan( y / x);
- else
- -- just below -x axis
- return - cycle / 2.0 + scale * reduced_arctan( y / x);
- end if;
- end arctan;
-
-
- function arccot (x : Float;
- y : Float := 1.0) return Float is
- --
- -- Declaration:
- -- function ARCCOT (X : FLOAT_TYPE;
- -- Y : FLOAT_TYPE := 1.0) return FLOAT_TYPE;
- -- Definition:
- -- (a) ARCCOT(X) ~ arccot(X)
- -- (b) ARCCOT(X,Y) ~ arccot(X/Y) when Y >= 0.0
- -- arccot(X/Y)-Pi when Y < 0.0
- -- Usage:
- -- Z := ARCCOT(X); -- Z, in radians, is the angle (in the
- -- -- quadrant containing the point (X,1.0)
- -- -- whose cotangent is X
- -- Z := ARCCOT(X, Y); -- Z, in radians, is the angle (in the
- -- -- quadrant containing the point (X,Y))
- -- -- whose cotangent is X/Y
- -- Domain:
- -- Y /= 0.0 when X = 0.0
- -- Range:
- -- (a) 0.0 <= ARCCOT(X) <= Pi
- -- (b) 0.0 <= ARCCOT(X,Y) <= Pi when Y >= 0.0
- -- (c) -Pi <= ARCCOT(X,Y) <= 0.0 when Y < 0.0
- -- Accuracy:
- -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) ARCCOT(0.0) = Pi/2
- -- (c) ARCCOT(0.0,Y) = Pi/2 when Y > 0.0
- -- -Pi/2 when Y < 0.0
- -- (d) ARCCOT(X,0.0) = 0.0 when X > 0.0
- -- Pi when X < 0.0
- -- Notes:
- -- -Pi,-Pi/2,Pi/2 and Pi are not safe numbers of FLOAT_TYPE. Accordingly,
- -- an implementation may exceed the range limits, but only slightly;
- -- cf. Section 9 for a precise statement of the requirements. Similarly,
- -- when accuracy requirement (b), (c), or (d) applies, an implementation may
- -- approximate the prescribed result, but only within narrow limits;
- -- cf. Section 10 for a precise statement of the requirements.
- --
- begin
- return arctan( y => y, x => x);
- end arccot;
-
-
- function arccot (x : Float;
- y : Float := 1.0;
- cycle : Float) return Float is
- --
- -- Declaration:
- -- function ARCCOT (X : FLOAT_TYPE;
- -- Y : FLOAT_TYPE := 1.0;
- -- CYCLE : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- (a) ARCCOT(X,CYCLE) ~ arccot(X)*CYCLE/2Pi
- -- (b) ARCCOT(X,Y) ~ arccot(X/Y)*CYCLE/2Pi when Y >= 0.0
- -- (arccot(X/Y)-Pi)*CYCLE/2Pi
- -- Usage:
- -- Z := ARCCOT(X, CYCLE => 360.0); -- Z, in degrees, is the
- -- -- angle (in the quadrant
- -- -- containing the point
- -- -- (X,1.0)) whose cotangent
- -- -- is X
- -- Z := ARCCOT(X, CYCLE => 1.0); -- Z, in bams, is the
- -- -- angle (in the quadrant
- -- -- containing the point
- -- -- (X,1.0)) whose cotangent
- -- -- is X
- -- Z := ARCCOT(X, CYCLE => CYCLE); -- Z, in units such that one
- -- -- complete cycle of rotation
- -- -- corresponds to Z = CYCLE,
- -- -- is the angle (in the
- -- -- quadrant containing the
- -- -- point (X,1.0)) whose
- -- -- cotangent is X
- -- Z := ARCCOT(X, Y, 360.0); -- Z, in degrees, is the
- -- -- angle (in the quadrant
- -- -- containing the point (X,Y))
- -- -- whose cotangent is X/Y
- -- Z := ARCCOT(X, Y, 1.0); -- Z, in bams, is the
- -- -- angle (in the quadrant
- -- -- containing the point (X,Y)
- -- -- whose cotangent is X/Y
- -- Z := ARCCOT(X, Y, CYCLE); -- Z, in units such that one
- -- -- complete cycle of rotation
- -- -- corresponds to Z = CYCLE
- -- -- is the angle (in the
- -- -- quadrant containing the
- -- -- point (X,Y)) whose
- -- -- cotangent is X/Y
- -- Domain:
- -- (a) Y /= 0.0 when X = 0.0
- -- (b) CYCLE > 0.0
- -- Range:
- -- (a) 0.0 <= ARCCOT((X,CYCLE) <= CYCLE/2.0
- -- (b) 0.0 <= ARCCOT(X,Y,CYCLE) <= CYCLE/2.0 when Y >= 0.0
- -- (c) -CYCLE/2.0 <= ARCCOT(X,Y,CYCLE) <= 0.0 when Y < 0.0
- -- Accuracy:
- -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) ARCCOT(0.0,CYCLE) = CYCLE/4.0
- -- (c) ARCCOT(0.0,Y,CYCLE) = CYCLE/4.0 when Y > 0.0
- -- -CYCLE/4.0 when Y < 0.0
- -- (d) ARCCOT(X,0.0,CYCLE) = 0.0 when X > 0.0
- -- CYCLE/2.0 when X < 0.0
- -- Notes:
- -- - CYCLE/2.0, - CYCLE/4.0, CYCLE/4.0 and CYCLE/2.0
- -- might not be safe numbers of FLOAT_TYPE. Accordingly,
- -- an implementation may exceed the range limits, but only slightly;
- -- cf. Section 9 for a precise statement of the requirements. Similarly,
- -- when accuracy requirement (b), (c), or (d) applies, an implementation may
- -- approximate the prescribed result, but only within narrow limits;
- -- cf. Section 10 for a precise statement of the requirements.
- --
- begin
- return arctan( y => y, x => x, cycle => cycle);
- end arccot;
-
-
- function sinh (x : Float) return Float is
- --
- -- Declaration:
- -- function SINH (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- SINH(X) ~ sinh X
- -- Usage:
- -- Z := SINH(X);
- -- Domain:
- -- Mathematically unbounded
- -- Range:
- -- Mathematically unbounded
- -- Accuracy:
- -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) SINH(0.0) = 0.0
- -- Notes:
- -- The usable domain of SINH is approximately given by
- -- |X| <= ln(FLOAT_TYPE'SAFE_LARGE)+ln(2.0)
- --
- ax: constant Float := abs( x);
- meet_exp: constant := (mant + 1) * nat_log_2 / 2.0;
-
- begin
- if ax > meet_exp then
- -- essentially +-exp(ax)/2, but must round and overflow properly
- declare
- ex: constant Float := (exp_1 / 2.0) * exp( ax - 1.0);
- begin
- if x >= 0.0 then
- return ex;
- else
- return - ex;
- end if;
- end;
- elsif abs( x) <= 0.25 then
- -- small args need an economized taylor series, relerr<1.5e-9.
- declare
- a1: constant := 1.00000000151607e+00;
- a3: constant := 1.66666230112174e-01;
- a5: constant := 8.35195337329340e-03;
- x2: constant Float := x * x;
- begin
- return x * (a1 + x2 * (a3 + x2 * a5));
- end;
- else -- 1/4 < abs x <= meet_exp
- declare
- ex: constant Float := exp( x);
- begin
- return 0.5 * (ex - 1.0 / ex);
- end;
- end if;
- end sinh;
-
-
- function cosh (x : Float) return Float is
- --
- -- Declaration:
- -- function COSH (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- COSH(X) ~ cosh X
- -- Usage:
- -- Z := COSH(X);
- -- Domain:
- -- Mathematically unbounded
- -- Range:
- -- COSH(X) >= 1.0
- -- Accuracy:
- -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) COSH(0.0) = 1.0
- -- Notes:
- -- The usable domain of COSH is approximately given by
- -- |X| <= ln(FLOAT_TYPE'SAFE_LARGE)+ln(2.0)
- --
- ax: constant Float := abs( x);
- meet_exp: constant := (mant + 1) * nat_log_2 / 2.0;
-
- begin
- if ax > meet_exp then
- -- essentially exp(ax)/2, but must round and overflow properly
- return (exp_1 / 2.0) * exp( ax - 1.0);
- else
- declare
- ex: constant Float := exp( x); -- moderate size
- begin
- return 0.5 * (ex + 1.0 / ex);
- end;
- end if;
- end cosh;
-
-
- function tanh (x : Float) return Float is
- --
- -- Declaration:
- -- function TANH (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- TANH(X) ~ tanh X
- -- Usage:
- -- Z := TANH(X);
- -- Domain:
- -- Mathematically unbounded
- -- Range:
- -- |TANH(X)| <= 1.0
- -- Accuracy:
- -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) TANH(0.0) = 0.0
- --
- ax: constant Float := abs( x);
- meet_exp: constant := (mant + 1) * nat_log_2 / 2.0;
-
- begin
- -- check whether exp(-ax) negligible wrt exp(ax)
- if ax > meet_exp then
- if x >= 0.0 then
- return 1.0;
- else
- return -1.0;
- end if;
- elsif ax <= 0.25 then
- -- small args need an economized taylor series, relerr<2.5e-9.
- declare
- x2: constant Float := x * x;
- a0: constant := 9.99999997535969e-01;
- a2: constant := -3.33332067277568e-01;
- a4: constant := 1.33231448939255e-01;
- a6: constant := -5.13290121422199e-02;
- begin
- return x * (a0 + x2 * (a2
- + x2 * (a4
- + x2 * a6)));
- end;
- else -- 1/4 < abs x <= meet_exp
- declare
- ex: Float := exp( x);
- begin
- return (ex - 1.0 / ex) / (ex + 1.0 / ex);
- end;
- end if;
- end tanh;
-
-
- function coth (x : Float) return Float is
- --
- -- Declaration:
- -- function COTH (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- COTH(X) ~ coth X
- -- Usage:
- -- Z := COTH(X);
- -- Domain:
- -- X /= 0.0
- -- Range:
- -- |COTH(X)| >= 1.0
- -- Accuracy:
- -- Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON
- --
- begin
- if x /= 0.0 then
- return 1.0 / tanh( x);
- else
- raise argument_error;
- end if;
- end coth;
-
-
- function arcsinh (x : Float) return Float is
- --
- -- Declaration:
- -- function ARCSINH (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- ARCSINH(X) ~ arcsinh X
- -- Usage:
- -- Z := ARCSINH(X);
- -- Domain:
- -- Mathematically unbounded
- -- Range:
- -- Mathematically unbounded
- -- Accuracy:
- -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) ARCSINH(0.0) = 0.0
- -- Notes:
- -- The reachable range of ARCSINH is approximately given by
- -- |ARCSINH(X)| <= ln(FLOAT_TYPE'SAFE_LARGE)+ln(2.0)
- --
- ax: Float := abs( x);
- as: Float;
- reduce_edge: constant := sqrt_2 / 4.0;
- -- see reduced_logdel_2 for its domain definition
-
- begin
- if ax >= recip_sqrt_epsilon then
- as := 1.0 + log_2( ax);
- elsif ax >= reduce_edge then
- as := log_2( ax + sqrt( 1.0 + sqr(ax)));
- elsif ax > Float'base'epsilon then
- -- ax < sqrt(1/8)
- -- z = x+sqrt(1+x**2)
- -- (z-1)/(z+1) = x/(1+sqrt(1+x**2))
- as := reduced_logdel_2( ax / (1.0 + sqrt( 1.0 + sqr( ax))));
- else
- return x; -- sign already correct for small args
- end if;
- if x >= 0.0 then
- return nat_log_2 * as;
- else
- return - nat_log_2 * as;
- end if;
- end arcsinh;
-
-
- function arccosh (x : Float) return Float is
- --
- -- Declaration:
- -- function ARCCOSH (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- ARCCOSH(X) ~ arccosh X
- -- Usage:
- -- Z := ARCCOSH(X);
- -- Domain:
- -- X >= 1.0
- -- Range:
- -- ARCCOSH(X) >= 0.0
- -- Accuracy:
- -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) ARCCOSH(1.0) = 0.0
- -- Notes:
- -- The upper bound of the reachable range of ARCCOSH is approximately given
- -- by ARCCOSH(X) <= ln(FLOAT_TYPE'SAFE_LARGE)+ln(2.0)
- --
- reduce_edge: constant := 0.75 * sqrt_2;
- -- see reduced_logdel_2 for its domain definition
-
- begin
- if x >= recip_sqrt_epsilon then
- return nat_log_2 * (1.0 + log_2( x));
- elsif x >= reduce_edge then
- return nat_log_2 * log_2( x + sqrt( sqr( x) - 1.0));
- elsif x >= 1.0 then
- -- z = x+sqrt(x**2-1)
- -- (z-1)/(z+1) = sqrt((x-1)/(x+1))
- return nat_log_2 * reduced_logdel_2( sqrt( (x - 1.0) / (x + 1.0) ));
- else
- raise argument_error;
- end if;
- end arccosh;
-
-
- function arctanh (x : Float) return Float is
- --
- -- Declaration:
- -- function ARCTANH (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- ARCTANH(X) ~ arctanh X
- -- Usage:
- -- Z := ARCTANH(X);
- -- Domain:
- -- |X| < 1.0
- -- Range:
- -- Mathematically unbounded
- -- Accuracy:
- -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON
- -- (b) ARCTANH(0.0) = 0.0
- --
- ax: constant Float := abs( x);
- half_ln_2: constant := nat_log_2 / 2.0;
- reduce_edge: constant := 1.0 / (3.0 + 2.0 * sqrt_2);
- -- see reduced_logdel_2 for its domain definition
-
- begin
- if ax >= 1.0 then
- raise argument_error;
- elsif ax < reduce_edge then
- return reduced_logdel_2( x) * half_ln_2;
- else
- return log_2( (1.0 + x) / (1.0 - x)) * half_ln_2;
- end if;
- end arctanh;
-
-
- function arccoth (x : Float) return Float is
- --
- -- Declaration:
- -- function ARCCOTH (X : FLOAT_TYPE) return FLOAT_TYPE;
- -- Definition:
- -- ARCCOTH(X) ~ arccoth X
- -- Usage:
- -- Z := ARCCOTH(X);
- -- Domain:
- -- |X| > 1.0
- -- Range:
- -- Mathematically unbounded
- -- Accuracy:
- -- Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON
- --
- ax: constant Float := abs( x);
- half_ln_2: constant := nat_log_2 / 2.0;
- reduce_edge: constant := 3.0 + 2.0 * sqrt_2;
- -- see reduced_logdel_2 for its domain definition
-
- begin
- if ax <= 1.0 then
- raise argument_error;
- elsif ax > reduce_edge then
- return reduced_logdel_2( 1.0 / x) * half_ln_2;
- else
- -- 1 < ax <= 3+2*sqrt2
- return log_2( (x + 1.0) / (x - 1.0)) * half_ln_2;
- end if;
- end arccoth;
-
- end Math;
-
- -- $Header: s_elementary_functions_b.ada,v 3.13 90/04/19 12:34:31 broman Rel $
-